﻿# -*- coding: utf-8 -*-
'''
Python Recursive Serializer, PRSerializer 0.1.4
Felipe A. Hernandez
spayder26 at gmail dot com

License:
    This library is licensed under LGPLv3.
    http://www.gnu.org/licenses/lgpl-3.0.html

Usage:
    Like pickle, this module has both dumps and loads functions. But
    serializable classes must be registered for security reasons and
    they must have both __getstate__ and __setstate__ methods for
    pickling and unpickling respectively.

Functions:
    * serializable( class_object or str_class_alias [, str_class_alias] )
      SerializerClass.serializable( class_object or str_class_alias [, str_class_alias] )
        It receives any class type, and it will be registered by
        Serializer.
        If str_class_alias is given instead class_object as first
        parameter, a new decorator function will be returned for
        registering the classType with given alias.
        Received classtypes must have both __getstate__ and __setstate__
        methods, __getstate__ must return a serializable object that
        __setstate__ will create the class when unserializing (should be a
        classmethod constructor, or a convenience method).

        Can be used as class decorator @serializable [ ( str_class_alias ) ] in Python 2.6+.

    * dumps( serializable_object [, unused_protocol ] )
      Serializer.dumps( serializable_object [, unused_protocol ] )
        Receives a supported object or inherited from supported.
        Supported types are:
            bool, None, str, int, long, float, complex, list, tuple,
            xrange, dict

        Can also support class instances when its type are registered
        using Classes.register classmethod.

    * loads( serialized_str )
      Serializer.loads( serialized_str )
        Receives a serialized string (generated by dumps) and convert
        them to Python objects. When a class object is loaded and it's
        not registered on Classes, it will raise an error.

Exceptions:
    * ClassRegistrationError
        Raises when a ClassType do not meet requirement's as described
        above.

    * SerializationError
        Raises when trying to serialize and unsuported object or
        non-registered class instance.

    * UnserializationError
        Raises when there is an error when trying to unserialize an
        string. It commonly raises when trying to load a malformed
        string which is not generated by the module itself.

Notes:
    * It's simple because its core is coded on less than 200 lines.
    * It's pure-Python, so it's performance is not great, and its near
      2x faster unserializing than serializing.
    * It's safe, because it only handles some python builtin types and
      specified clases with handler methods.
    * It's around a 35% faster than pickle.
    * Shorter output than pickle on old protocols (0 and 1), but a bit
      longer than last one (protocol 2 on 2.6), but works better with
      compresion.
    * Uses only printable characters (if no compression is chosen).

Example:
    >>> # PRSerializer needs class registration
    >>> @serializable("myclass")
    ... class MyClass(object):
    ...    foo = True
    ...    bar = None
    ...
    ...    def __init__(self):
    ...        self.foo = xrange(1000)
    ...
    ...    def __getstate__(self):
    ...        # Required by PRSerializer, returned data will be serialized
    ...        return (self.foo, self.bar)
    ...
    ...    @classmethod
    ...    def __setstate__(cls,o):
    ...        # Required by PRSerializer, this should generate the new object
    ...        self = cls()
    ...        self.foo = o[0]
    ...        self.bar = o[1]
    ...        return self
    ...
    ...    def __repr__(self):
    ...        return "<MyClass object with foo=%s bar=%s>" % (
    ...            repr(self.foo), repr(self.bar))
    ...
    >>> instance = MyClass()
    >>> instance.foo = "Text string"
    
    >>> # Serialization
    >>> a = dumps(instance)
    >>> print "Serialized string:", repr(a)
    Serialized string: '0.1.4:omyclass:psText string;0;n'

    >>> # Unserialization
    >>> b = loads(a)
    >>> print "Instance:", repr(b)
    Instance: <MyClass object with foo='Text string' bar=None>
'''

import sys
import types
import itertools
import 

__author__ = "Felipe A. Hernandez"
__authemail__ = "spayder26 at gmail dot com"
__version__ = "0.1.4"
__license__ = "LGPLv3"

class ClassRegistrarionError(Exception):
    '''Base class for Classes' error'''
    pass

class SerializationError(IOError):
    '''Serialization exceptions'''
    pass

class UnserializationError(IOError):
    '''Unserialization exceptions'''
    pass

class SerializerClass(object):
    '''Serializer and unserializer class.'''

    __detect_range = sys.version_info[0] < 3

    compression = 1
    def __init__(self, compression = 0):
        '''Initializes the serializer-unserializer class.'''
        self.__classAlias = {}
        self.__aliasClass = {}
        self.__classGetState = {}
        self.__classSetState = {}
        
        self.compression = compression
        
    def serializable(self, class_or_alias = None, alias = None, getstate = None, setstate = None):
        '''Mark classtype as serializable. Usable as class decorator.

        Args:
            class_or_alias: optional, class, or alias if string is given.
            alias: optional, alias as string, incompatible with class_or_alias as alias.
            getstate: optional, function whose return will be serialized.
            setstate: optional, function will be called with unserialized data.

        Returns:
            Decorator if no ClassType is given in class_or_alias, or the
            same ClassType given in class_or_alias if given.

        Usage:
            
            @serializer.serializable(alias, getstate = lambda self, data:)
            '''
        if type(class_or_alias) in (types.StringType, types.NoneType):
            assert alias is None or class_or_alias is None, "Alias cannot be given twice (class_or_alias and alias parameters are both exclusives)."
            return lambda x: self.serializable(x, class_or_alias or alias, getstate, setstate)
        
        if alias is None:
            alias = class_or_alias.__name__
            if alias in self.__aliasClass:
                raise ClassRegistrarionError, "Two classes have the same name, please provide aliases."
        elif alias in self.__aliasClass:
            raise ClassRegistrarionError, "Two classes have the same alias, please provide a different one."

        try:
            if getstate is None:
                getstate = class_or_alias.__getstate__
        except AttributeError:
            raise ClassRegistrarionError,"Serializable classes must have an __getstate__ method if not 'getstate' parameter is given to 'serializable' decorator or method."

        try:
            if setstate is None:
                setstate = class_or_alias.__setstate__
        except AttributeError:
            raise ClassRegistrarionError,"Serializable classes must have an __setstate__ method if not 'setstate' parameter is given to 'serializable' decorator or method."

        self.__classAlias[class_or_alias] = alias, getstate
        self.__aliasClass[alias] = class_or_alias, setstate

        return class_or_alias

    def __serialRange(self, x, level, p="x"):
        '''Auxiliar analyzer for xrange/range params.

        Args:
            x: range-generated list or xrange iterator.
            level: current recursion level.
            p: string which will be prepended to output.
               Defaults to x (xrange).

        Returns:
            Serialized xrange or range-generated list as string.'''
        if x:
            s=x[0]
            t=x[1]-s
            e=x[-1]+t
            if t != 1: args = (s, e, t)
            elif s != 0: args = (s, e)
            else: args = (e,)
            return self.__serialIterable(args, level, p, self.__serializer[types.IntType])
        return "%sz" % p

    def __serialIterable(self, x, level, p, dumper):
        '''Serializer for iterables.

        Args:
            x: iterable.
            level: current recursion level.
            p: string which will be prepended to output.
            dumper: parser will be used for elements (unbounded).

        Returns:
            Serialized iterable as string.'''
        if x:
            nl = level + 1
            sep = ";%d;" % level
            return "%s%s" % (p, sep.join(dumper(self, i, nl) for i in x))
        return p

    def __serialDict(self, x, level, p = "d"):
        '''Serializer for dict-likes.

        Args:
            x: iterable.
            level: current recursion level.
            p: string which will be prepended to output.
               Defaults to d (dict).

        Returns:
            Serialized iterable as string.'''
        if x:
            nl = level + 1
            sep = ";%d;" % level
            return "%s%s" % (p, sep.join(
                    "%s%s%s" % (self.__dumps(k, nl), sep, self.__dumps(v, nl))
                    for k, v in x.iteritems())
                    )
        return p

    def __serialList(self, x, level, p = "l"):
        '''Serializer for lists. We take care of range-generated lists
        if self.select_range is True.

        Args:
            x: iterable.
            level: current recursion level.
            p: string which will be prepended to output.
               Defaults to l (list).

        Returns:
            Serialized iterable as string.'''
        if (self.__detect_range and len(x) > 1 and isinstance(x[0], int)
            and isinstance(x[1], int) and isinstance(x[-1], int) and
            x == range(x[0], x[-1] + (x[1]-x[0]), x[1]-x[0])):
            return self.__serialRange(x, level, "r")
        return self.__serialIterable(x, level, "l", self.__class__.__dumps)

    def __serialInstance(self, x, level, p = "o"):
        '''Serialize class object.

        Args:
            x: class object instance.
            l: current recursion level, defaults to 0.
            p: string which will be prepended to output.
               Defaults to o (object).

        Returns:
            Serialized instance as string.'''
        try:
            alias, getstate = self.__classAlias[x.__class__]
            return "%s%s:%s" % (p,
                self.__escape(alias),
                self.__dumps(getstate(x), level)
                )
        except KeyError:
            raise ClassRegistrarionError,"%s not registered" % x.__class__.__name__

    __serializer = {
        types.BooleanType:lambda self,x,l:"t" if x else "f",
        types.ComplexType:lambda self,x,l:"j%g%s%g" % (x.real, "" if x.imag < 0 else "+", x.imag),
        types.DictProxyType:__serialDict,
        types.DictType:__serialDict,
        types.FloatType:lambda self,x,l:"a%g" % x,
        types.GeneratorType:lambda self,x,l:self.__serialIterable(x, l, "c", self.__class__.__dumps),
        types.InstanceType:__serialInstance,
        types.IntType:lambda self,x,l:"z" if x == 0 else "i%d" % x,
        types.ListType:__serialList,
        types.LongType:lambda self,x,l:"g%g" % x,
        types.NoneType:lambda self,x,l: "n",
        types.StringType:lambda self,x,l:"s%s" % self.__escape(x),
        types.TupleType:lambda self,x,l:self.__serialIterable(x, l, "p", self.__class__.__dumps),
        types.UnicodeType:lambda self,x,l:"u%s" % self.__escape(x).encode('unicode_escape'),
        types.XRangeType:__serialRange,
        }

    def __dumps(self, x, level = 0):
        '''Recursive _dump (aka. serializer) function.

        Args:
            x: serializable object.
            level: current recursion level, defaults to 0.

        Returns:
            Serialized object as string.'''
        tx = type(x)
        if tx in self.__serializer:
            return self.__serializer[tx](self, x, level)
        if tx in self.__classAlias:
            return self.__serialInstance(x, level)
        for px in x.__class__.__mro__:
            if px in self.__serializer:
                return self.__serializer[px](self, x, level)
        raise SerializationError, "Non serializable object: %s" % type(x)

    def dumps(self, x, protocol = None):
        '''Converts any serializable object to string.

        Args:
            x: serializable object.
            protocol: dummy arg for pickler interchangeability.

        Returns:
            Serialized object as string.
        '''
        data = self.__dumps(x)
        if self.compression == 0: return "%s:%s" % (__version__, data)
        return "%s:y%s" % (__version__, zlib.compress(data, self.compression))

    def __liter(self, x, level):
        '''Generates a python generator from serialized string with level
        separators.

        Args:
            x: serialized iterable.
            level: current recursion level.

        Returns:
            Python generator (iterable).'''
        if x:
            nl = level+1
            return (self.__loads(i, nl) for i in x.split(";%d;" % level))
        return ()

    def __ldict(self, x, level):
        '''Generates a python dict from serialized string with level
        separators.

        Args:
            x: serialized iterable.
            level: current recursion level.

        Returns:
            Python dict.'''
        if x:
            nl = level+1
            se = iter(x.split(";%d;" % level))
            return {self.__loads(s, nl): self.__loads(se.next(), nl) for s in se}
        return {}

    def __linstance(self, x, level):
        '''Unserialize class object.

        Args:
            x: serialized object string.
            l: current recursion level, defaults to 0.

        Returns:
            Class object instance.'''
        alias, data = x.split(":", 1)
        try:
            cls, setstate = self.__aliasClass[self.__unescape(alias)]
            if setstate.im_self:
                # Is bounded
                obj = setstate(self.__loads(data, level))
            else:
                # Needs an instance
                obj = cls.__new__(cls) if hasattr(cls,"__new__") else types.InstanceType(cls)
                setstate(obj, self.__loads(data, level))
            return obj
        except KeyError:
            raise ClassRegistrarionError," %s is not registered." % alias

    def __escape(self, x):
        '''Escapes reserved chars:
            & used for scaped chars.
            ; used for item separators.
            : used for classes.

        Args:
            x: string will be escaped.

        Returns:
            Escaped string.'''
        return x.replace("&","&a").replace(";","&c").replace(":","&d")

    def __unescape(self, x):
        '''Unescapes reserved chars. See `SerializerClass.__escape`.

        Args:
            x: string to be escaped.

        Returns:
            Unescaped string.'''
        return x.replace("&d",":").replace("&c",";").replace("&a","&")

    __parser = {
        "y":lambda self,x,l:self.__loads(zlib.decompress(x), l),
        "s":lambda self,x,l:self.__unescape(x),
        "u":lambda self,x,l:self.__unescape(x).decode('unicode_escape'),
        "t":lambda self,x,l:True,
        "f":lambda self,x,l:False,
        "n":lambda self,x,l:None,
        "z":lambda self,x,l:0,
        "i":lambda self,x,l:int(x),
        "a":lambda self,x,l:float(x),
        "g":lambda self,x,l:long(x),
        "j":lambda self,x,l:complex("%sj" % x),
        "x":lambda self,x,l:xrange(*self.__liter(x, l)),
        "r":lambda self,x,l:range(*self.__liter(x, l)),
        "p":lambda self,x,l:tuple(self.__liter(x, l)),
        "l":lambda self,x,l:list(self.__liter(x, l)),
        "c":__liter,
        "d":__ldict,
        "o":__linstance
        }

    def __loads(self, x, level=0):
        '''Converts serialized string to Python object.

        Args:
            x: serialized string.
            level: current recursion level. Defaults to 0.

        Returns:
            Unserialized object.'''
        try:
            return self.__parser[x[0]](self, x[1:], level)
        except KeyError:
            raise UnserializationError,"String not unserializable: %s " % x
        except IndexError:
            raise UnserializationError,"String not serializable due empty object."
        except TypeError:
            raise UnserializationError,"Cannot serialize non-strings"

    __legacy_instances = {}
    def __legacy_loads(self, x, v):
        '''Generate and cache PRSerializer instances for legacy
        PRSerializer formats and unserializes.

        Args:
            x: serialized string
            v: old version definition.

        '''
        if v not in self.__legacy_instances:
            legacy = type("LegacySerializer%s" % v, (self.__class__,), {})()
            if v == "1.2.3":
                legacy.__parser = self.__parser.copy()
                legacy.__parser["d"] = lambda self, x, l: dict(self.__liter(x, l))
                legacy.__parser["j"] = lambda self, x, l: complex(x)
            self.__legacy_instances[v] = legacy
        return self.__legacy_instances[v].__loads(x)

    def loads(self, x):
        '''Converts serialized string to Python object.

        Args:
            x: serialized string.

        Returns:
            Unserialized object.'''
        version = x.split(":", 1)[0] if x[0].isdigit() else "1.2.3" # Version data was added on 1.2.4
        if version == __version__:
            return self.__loads(x[len(version)+1:])
        return self.__legacy_loads(x, version)


# Serializer object
Serializer = SerializerClass()

# Top level module methods
serializable = Serializer.serializable # register's class decorator
register = Serializer.serializable
loads = Serializer.loads
dumps = Serializer.dumps

